# -*- coding: utf-8 -*-
greek_ch = [u'', u'α', u'β', u'γ', u'δ', u'ε', u'ζ', u'η', u'θ', 
            u'ι', u'κ', u'λ', u'μ', u'ν', u'ξ', u'ο', u'π', u'ρ', 
            u'σ', u'τ', u'υ', u'φ', u'χ', u'ψ', u'ω', 
            u'ά', u'έ', u'ί', u'ό', u'ύ', u'ή', u'ώ', u'ς', u'ϊ', u'ΰ', u'ϋ', u'ΐ' ]

greeklish_ch = ['','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
                'p','q','r','s','t','u','v','w','x','y','z',
                '1','2','3','4','5','6','7','8','9','0', ]


greek_parts = [u'α', u'β', u'γ', u'δ', u'ε', u'ζ', u'η', u'θ', 
            u'ι', u'κ', u'λ', u'μ', u'ν', u'ξ', u'ο', u'π', u'ρ', 
            u'σ', u'τ', u'υ', u'φ', u'χ', u'ψ', u'ω',]

#Greeklish dipthongs that may map to single greek chars
dipthongs = {'ps':u'ψ','ph':u'φ',
             'th':u'θ',
             'ks':u'ξ','ai':u'ε','ei':u'ι','ei':u'η',
             'bb':u'β','gg':u'γ','kk':u'κ','ll':u'λ','mm':u'μ','nn':u'ν', 'rr':u'ρ','ss':u'σ','tt':u'τ','ch':u'χ'}#,'oy':u'ου', 'ou':u'ου'}

#Greeklish dipthongs that may map to greek dipthongs
dipthongs_normal = {'bb':u'ββ','gg':u'γγ','kk':u'κκ','ll':u'λλ','mm':u'μμ','nn':u'νν', 'rr':u'ρρ','ss':u'σσ','tt':u'ττ','ph':u'πη','th':u'τη'}

#greek dipthongs that may map to  single greeklish chars
dipthongs_rev = {u'αι':'e', u'μπ':'b',   u'ββ':'b',u'γγ':'g',u'κκ':'k',u'λλ':'l',u'μμ':'m',u'νν':'n', u'ρρ':'r',u'σσ':'s',u'ττ':'t',
                 u'ει':'i', u'οι':'i', u'αυ':'af', u'ευ':'ef', u'ου':'u', u'γκ':'g',u'ντ':'d'}

#greek dipthongs that may map to greeklish diprhongs
dipthongs_rev_normal = {u'ββ':'bb',u'γγ':'gg',u'κκ':'kk',u'λλ':'ll',u'μμ':'mm',u'νν':'nn', u'ρρ':'rr',u'σσ':'ss',u'ττ':'tt', u'γκ':'gk', u'τη':'th'}

greeklish_split=['th','ps','ks', '8','j','3']
#dipthongs_split=['th','ps','ks',]

gr_split=[u'θ',u'ψ',u'ξ',u'τη']

special_chars = greeklish_split+gr_split
#gr_split=[u'τη']
#gr_split=[]

#map from accented characters to simple characters
simple_ch = {  0x03AC:0x03B1, 0x03AD:0x03B5, 0x03AF:0x03B9,0x03CC:0x03BF ,0x03CD:0x03C5,0x03AE:0x03B7,0x03CE:0x03C9,0x03CA:0x03B9,0x03CB:0x03C5 , 0x03B0:0x03C5, 0x0390:0x03B9}

#weights for greeklish word clustering
weights = {'3':30,'8':30,'w':20,'h':15,'i':5,'u':10,'y':10,'v':15,'b':15,'x':8} 
